import wordcloud
import jieba
from pyspark import SparkConf, SparkContext
#from scipy.misc import imread
import matplotlib.pyplot as plt

conf = SparkConf().setMaster("local").setAppName("quzhi")
sc = SparkContext(conf=conf)
lines = sc.textFile("58data.txt")

def GetLine(line):
    infos =line.split("\t")
    return infos[0]

pairRDD = lines.map(lambda line:GetLine(line))
#print(pairRDD.collect())
p = pairRDD.collect()
s = ' '.join(p)
s.replace(',',' ')
print(s)


'''def GetLine(line):
    infos =line.split("\t")
    return infos[2]

pairRDD = lines.map(lambda line:GetLine(line))
#print(pairRDD.collect())
p = pairRDD.collect()
s = ' '.join(p)
s.replace(',',' ')
#print(s)'''


'''f=open('58data.txt', encoding='UTF-8')
txt=[]
l = []
for line in f:
    txt.append(line.strip())'''




w=wordcloud.WordCloud(
    #background_color='white',  # 设置背景颜色  默认是black
    width=900, height=600,
    max_words=100,  # 词云显示的最大词语数量
    font_path='simhei.ttf',  # 设置字体  显示中文
    max_font_size=99,  # 设置字体最大值
    min_font_size=16,  # 设置子图最小值
    random_state=50  # 设置随机生成状态，即多少种配色方案
)
w.generate(s)
w.to_file("2.png")


